---
title: "Replication: Table 2 and Figure S1"
output: 
  bookdown::html_document2:
    highlight: tango
    number_sections: yes
    theme: united
    code_folding: hide
    toc: yes
date: "8 Nov 2020"
author: "Ashish Shenoy (editing Macartan Humphreys)"
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
 options(knitr.kable.NA = '')
library(readstata13)
library(DeclareDesign)
 library(knitr)
library(Hmisc)
library(tidyverse)
library(haven)
library(patchwork)
library(ggplot2)
library(gridExtra)
library(forcats)
# knitr::opts_knit$set(root.dir = '../')
outpath <- ""
```

# Table 2

## Data Prep and Check



```{r, message = FALSE, warning = FALSE}
# Prep data
common_vars <- c("id", "study", "weight", "cluster", "employmentdrop", "incomedrop", "healthaccess", "missedmeals", "anysupport", "accessmarkets", "poor")

data <- list()

# BGD1
bgd1_median <- read_dta("../data/BGD1.dta") %>% .$hh_consumption %>% median(., na.rm = T)
data$BGD1 <- readstata13::read.dta13("../data/BGD1.dta") %>% 
    filter(covid_sample == 1) %>% 
    mutate(missedmeals = ifelse(is.na(ec_q4_mechanism_3), 0 , ifelse(ec_q4_mechanism_3 == "Yes",1, 0)),
           anysupport  = ifelse(is.na(ec_q4_mechanism_7), 0 , ifelse(ec_q4_mechanism_7 == "Yes",1, 0)),
           accessmarkets = ifelse(is.na(ec_q2_buying_3), 0 , ifelse(ec_q2_buying_3 == "Yes",1, 0)),
           employmentdrop = 1*(!is.na(employment_count_bas) & ec_q8_1 == "No"),
           incomedrop = as.numeric(ec_q11 < ec_q12), 
           poor = as.numeric(hh_consumption < bgd1_median),
           id = as.character(1:n()),  
           study = "BGD1") %>% 
    select(one_of(common_vars))


# BGD2
bgd2_median <- read_dta("../data/BGD2.dta") %>% .$hh_consumption %>%  median(., na.rm = T)
data$BGD2 <- readstata13::read.dta13("../data/BGD2.dta", convert.factors = FALSE) %>% 
    filter(covid_sample == 1) %>% 
    mutate(anysupport  = ifelse(is.na(ec_q4_mechanism_7), 0 , ec_q4_mechanism_7),
           accessmarkets = ifelse(is.na(ec_q2_buying_3), 0 , ec_q2_buying_3),
           missedmeals = ifelse(is.na(ec_q4_mechanism_2), 0 , ec_q4_mechanism_2),
           incomedrop = 1*((ec_q15_pastmonth < ec_q12_wage) | 
                          (ec_q21_profitlast < ec_q18_monthprofit)),
           incomedrop = ifelse(
             is.na(incomedrop) & 
              ((!is.na(ec_q15_pastmonth) & !is.na(ec_q12_wage)) |
              (!is.na(ec_q21_profitlast) & !is.na(ec_q18_monthprofit))), 
             0,incomedrop),
           poor = as.numeric(hh_consumption < bgd2_median),
           employmentdrop = ((lm14_wage_or_not == 1 )) & (ec_q11 != 1 | is.na(ec_q11)), 
           employmentdrop = ifelse(is.na(lm14_wage_or_not), 0, employmentdrop), 
           id = as.character(1:n()), 
           cluster = cluster2, 
           weight = hh_weight_covid, 
           study = "BGD2") %>% 
    select(one_of(common_vars))



# BGD3
bgd3_median <- read_dta("../data/BGD3.dta") %>% .$hh_consumption %>%  median(., na.rm = T)
data$BGD3 <- readstata13::read.dta13("../data/BGD3.dta", convert.factors = FALSE) %>% 
    filter(covid_sample == 1) %>% 
    rename(anysupport  = ec_q4_mechanism_7,
           accessmarkets = ec_q2_buying_3,
           missedmeals = ec_q4_mechanism_2) %>%
    mutate(
      # 0 coded if not a drop in *either* and if some comparative data in one
      incomedrop = 1*((ec_q15_pastmonth < ec_q12_wage) | 
                          (ec_q21_profitlast < ec_q18_monthprofit)),
      incomedrop = ifelse(is.na(incomedrop) & 
              ((!is.na(ec_q15_pastmonth) & !is.na(ec_q12_wage)) |
              (!is.na(ec_q21_profitlast) & !is.na(ec_q18_monthprofit))), 
             0,incomedrop),
      
    # Filling in decisions on missing  
    missedmeals = ifelse(is.na(missedmeals), 0 , missedmeals),
    accessmarkets = ifelse(is.na(accessmarkets), 0 , accessmarkets),
    poor = 1*(hh_consumption < bgd3_median),
    employmentdrop = ((lm14_wage_or_not == 1 )) & (ec_q11 != 1 | is.na(ec_q11)), 
    employmentdrop = ifelse(is.na(lm14_wage_or_not), 0, employmentdrop), 

    id = as.character(1:n()),
    weight = hh_weight_covid,
    cluster = cluster2,
    study = "BGD3") %>% 
  
    select(one_of(common_vars))

# BGD4
bgd4_median <- read_dta("../data/BGD4.dta") %>% .$hh_consumption %>%  median(., na.rm = T)
data$BGD4 <- readstata13::read.dta13("../data/BGD4.dta") %>% 
    filter(covid_sample == 1) %>% 
    mutate(anysupport  = ifelse(is.na(ec_q8_5), 0 , ifelse(ec_q8_5 == "Yes",1, 0)),
           accessmarkets = ifelse(is.na(ec_q2_buying_3), 0 , ifelse(ec_q2_buying_3 == "Yes",1, 0)),
           missedmeals = ifelse(is.na(ec_q4_mechanism_2), 0 , ifelse(ec_q4_mechanism_2 == "Yes",1, 0)),
           incomedrop = ifelse(ec_q11 < ec_q12, 1, 0),
           ec_q8_1 = ifelse(ec_q8_1 == "Yes", 1, 0),
           wage_employment = ifelse(wage_employment == "Yes", 1, 0),
           poor = as.numeric(hh_consumption <= bgd4_median)) %>%
    # Note different definition of employment drop compared to BGD3
    mutate(employmentdrop = ifelse(ec_q8_1 < wage_employment, 1, 0), 
           id = as.character(1:n()), study = "BGD4") %>% 
    select(one_of(common_vars))

# BGD5
# Restricted to the COVID sample (sample size 5309 -> 294);
# Results reproduced
bgd5_median <- read_dta("../data/BGD5.dta") %>% .$hh_inc %>%  median(., na.rm = T)
data$BGD5 <- readstata13::read.dta13("../data/BGD5.dta") %>% 
    filter(covid_sample == 1) %>% 
    mutate(anysupport  = ifelse(is.na(ec_q8_1), 0 , ifelse(ec_q8_1 == "Yes",1, 0)),
           accessmarkets = ifelse(is.na(ec_q2_buying_3), 0 , ifelse(ec_q2_buying_3 == "Yes",1, 0)),
           missedmeals = ifelse(as.numeric(ec_q5a) %in% c(2,3,4)|as.numeric(ec_q5b) %in% c(2,3,4), 1, 0),
           incomedrop = ifelse(ec_q24_b < hh_inc, 1, 0),
           employmentdrop = NA,
           poor = ifelse(hh_inc < bgd5_median, 1, 0)) %>%
    mutate(id = as.character(1:n()), study = "BGD5") %>% 
    select(one_of(common_vars))

# BFA1
data$BFA <- readstata13::read.dta13("../data/BFA_maintable.dta") %>%
    mutate(id = as.character(1:n()), weight = LSMS_WEIGHT, study = "BFA1") %>% 
    select(one_of(common_vars))

# COL1
data$COL1 <- readstata13::read.dta13("../data/COL_maintable.dta") %>% 
    mutate(id = as.character(1:n()), weight = LSMS_WEIGHT, study = "COL1") %>% 
    select(one_of(common_vars))

# GHA1
data$GHA  <- readstata13::read.dta13("../data/GHA_maintable.dta") %>% 
    mutate(id = as.character(1:n()), weight = LSMS_WEIGHT, study = "GHA1") %>% 
    select(one_of(common_vars))

# KEN1
data$KEN1 <- readstata13::read.dta13("../data/KEN1_maintable.dta") %>% 
    dplyr::rename(poor = b_cons) %>%
    mutate(id = as.character(hhid_key), weight = hh_weight, study = "KEN1") %>% 
    select(one_of(common_vars))

# KEN2
data$KEN2 <- readstata13::read.dta13("../data/KEN2_maintable.dta") %>% 
    dplyr::rename(incomedrop = inc3_total_drop, poor = b_inc) %>%
    mutate(id = as.character(1:n()), weight = weight, study = "KEN2") %>% 
    select(one_of(common_vars))

# KEN3
data$KEN3 <- readstata13::read.dta13("../data/KEN3_maintable.dta") %>% 
    dplyr::rename(incomedrop = inc3_total_drop, poor = b_inc) %>%
    mutate(id = as.character(1:n()), weight = hh_weight, study = "KEN3") %>% 
    select(one_of(common_vars))

# NPL1
# We merge two data sets, each with two waves, reshape wide and generate measures. 

NPL_employment <- readstata13::read.dta13("../data/NPL1_IND.dta") %>%
    # Next line gets a few more observations than in stata code     (wagebus = (wage + bus) > 0)
    mutate(wagebus = (wage>0 | bus >0) ) %>%
    group_by(hhid, today, round, base_inc) %>%
    summarize(employed = max(wagebus, na.rm = TRUE)) %>%  # Anyone employed 
    ungroup() %>%
    select(hhid, today, employed)

data$NPL1 <- readstata13::read.dta13("../data/NPL1_HH.dta") %>% 
  
    left_join(NPL_employment, by = c("hhid", "today")) %>%
  
    # Remove day duplicates, keep units with two rounds
   arrange(hhid, round, today) %>% group_by(hhid, round, today) %>% 
    slice(1) %>% ungroup %>%
  
    group_by(hhid) %>% mutate(two_rounds = n()==2) %>% ungroup %>%
  
    filter(two_rounds) %>%
    data.frame() %>%

    # uncomment to remove negative income cases 
    # mutate(base_inc = ifelse(base_inc <0, NA, base_inc)) %>%

    stats::reshape(idvar = "hhid", timevar = "round", direction = "wide") %>%

    mutate(
          poor = 1*(base_inc.3 < median(base_inc.3, na.rm = T)),
          missedmeals = 1*(hunger3.6 >1 | hunger4.6 >1), # Note not >=
          incomedrop = 1*(laborInc_hh.6 < laborInc_hh.3),
          employmentdrop = (employed.3 == 1 & employed.6 == 0),
          id = as.character(1:n()), 
          study = "NPL1") %>% 
  
    select(one_of(common_vars)) %>%
  filter(!is.na(employmentdrop) | !is.na(incomedrop) | !is.na(missedmeals))

# PHL1
data$PHL <- readstata13::read.dta13("../data/PHL_maintable.dta") %>% 
    mutate(id = as.character(1:n()), weight = LSMS_WEIGHT, study = "PHL1") %>% 
    select(one_of(common_vars))

# RWA1
data$RWA <- readstata13::read.dta13("../data/RWA_maintable.dta") %>% 
    mutate(id = as.character(1:n()), weight = LSMS_WEIGHT, study = "RWA1") %>% 
    select(one_of(common_vars))

# SLE1
# Here (unlike Stata) SES cutoff is taken from population baseline  median. 
# weight, cluster information missing
# max over periods used
#SLE_weights <-  read_dta("../data/SLE_covid_sample_town_size.dta") %>%
#  select(hhid, c_total_pop, community_id) %>% distinct %>% arrange(hhid) %>%
#  mutate(hhid =as.character(hhid), weight = c_total_pop, cluster = community_id) 

max2 <- function(x) ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE))

sl_median <-  
    read_dta("../data/SLE1.dta") %>% 
#    filter(covid_sample == 1) %>% 
    group_by(hhid) %>% 
    summarize(hh_cons = mean(hh_consumption, na.rm= TRUE)) %>% 
    ungroup() %>% pull(hh_cons) %>%
    median(na.rm = T)

data$SLE1 <- readstata13::read.dta13("../data/SLE1.dta") %>% 
  mutate(as.character(hhid)) %>%
#    left_join(SLE_weights) %>%
    dplyr::rename(anysupport = ngo_gov_sprt, 
                  accessmarkets = mkts_clsd, 
                  healthaccess = skip_health) %>%
    filter(covid_sample == 1) %>%  # COVID sample
    mutate(
      missedmeals = (reduce_male == 1 | reduce_female == 1 | reduce_boy == 1 | reduce_girl| post_skip == 1),
      pre_empl = ifelse(pre_empl=="Yes", 1, 0),
      employmentdrop = as.numeric(post_empl < pre_empl), 
      incomedrop = as.numeric(post_inc < pre_inc), 
      healthaccess = ifelse(healthaccess=="No", 0, 1),
      poor = as.numeric(hh_consumption <= sl_median)) %>%
  
  # Max outcome across waves is taken
  group_by(hhid) %>% summarize(
    employmentdrop = max2(employmentdrop),
    incomedrop = max2(incomedrop), 
    missedmeals = max2(missedmeals), 
    accessmarkets = max2(accessmarkets), 
    anysupport = max2(anysupport), 
    healthaccess = max2(healthaccess), 
    poor = max2(poor)) %>% ungroup %>%
    
  mutate(id = as.character(hhid), 
           cluster = as.character(hhid), 
#           cluster = ifelse(is.na(cluster), as.character(hhid), cluster), 
           study = "SLE1") %>%
  select(one_of(common_vars))

# SLE2
data$SLE2 <- readstata13::read.dta13("../data/SLE2_maintable.dta") %>% 
    mutate(id = as.character(1:n()), 
           weight = LSMS_WEIGHT, 
           study = "SLE2") %>% 
    select(one_of(common_vars))
```

## Combine datasets

Report on the number of observations for all variables in each dataset.

* Note some tiny samples on some variables
* Cluster information missing in most (SLE 1 clustering here is hh only)
* Weighting information is missing in many


```{r, include = FALSE}
# Data Summary:
dt <- lapply(data, function(df) lapply(df, class) %>% unlist) %>%
    bind_rows %>% mutate(study = names(data))
# Report on the data types for all variables in each dataset.
# kable(dt, caption = "Data types")

ns <- lapply(data, function(df) 
        apply(df, 2, function(j) sum(!is.na(j)))) %>%
    bind_rows %>% mutate(study = names(data))
kable(ns, caption = "Number of observations")
```


Combined data is generated and saved to `tab2_combined_data.csv`. 


```{r, comment = ""}

# Combine
df <- bind_rows(data)

# View data snapshot:
# str(df)
```


Weights and cluster issues: 

* For studies with no weights we set uniform weights.
* For studies with no clusters we set cluster = id.
* Note for combined analyses we standardize so that weights sum to 1 in each study.


```{r}
# If weights missing, use study means or 1 if all missing
df <- df %>% group_by(study) %>% 
    mutate(weight = ifelse(is.na(weight), mean(weight, na.rm = TRUE), weight),
           weight = ifelse(is.na(weight), 1, weight),
           cluster = ifelse(is.na(cluster), id, cluster)) %>% ungroup

# Display
# rbind(head(df), tail(df)) %>% kable
       
```


## Table 2

Analysis is done here first on each study and then combined. 


```{r, message = FALSE}
# Convert to long data set with rows for variables and weighting normalized for each study to sum to 1 (within each ses group).

 df_ses <- bind_rows( # create an "All" df
     mutate(df, ses = ifelse(poor==1, "Lower SES", "Higher SES")),
     mutate(df, ses = "All"))  %>%
     group_by(study, ses) %>%  mutate(weight = weight / mean(weight)) %>% ungroup %>%
     gather(variable, outcome, -cluster, -id, -weight, - study, -poor, -ses) %>%
     filter(!is.na(ses) & (ses!="NA"))

# Remove cases where outcome is missing completely
df_ses <- df_ses %>%
    group_by(variable, study, ses) %>% #group
    filter(!all(is.na(outcome))) %>%
    ungroup()

# Study specific results
study_results <- df_ses %>%
    nest_by(variable, study, ses) %>% #group
    mutate(model = list(lm_robust(outcome ~ 1, data = data, weights = weight))) %>%  summarize(tidy(model)) 
  
results <- bind_rows(study_results)

results$sorting <-(ifelse(results$ses=="All",3,ifelse(results$ses=="Lower SES",2,1)))
results <- results[order(results$variable,results$study, results$sorting),]
results$ses2 <- factor(results$ses, levels=unique(results$ses))
results$stu2 <- factor(results$study, levels=c('BGD1','BGD2','BGD3','BGD4','BGD5','BFA1','COL1','GHA1','KEN1','KEN2','KEN3','NPL1','PHL1','RWA1','SLE1','SLE2'))
results <- results[order(results$variable,results$stu2, results$sorting),]

```


Graph output:

```{r table2, fig.height=8, fig.width = 11, warning = FALSE, message = FALSE, fig.cap  = "Table 2 replication. Dashed line shows median"}
pd <- position_dodge(width=0.6)
cbPalette <- c("#E69F00", "#0072B2", "#999999", "#009E73", "#F0E442", "#56B4E9", "#D55E00", "#CC79A7")
library(forcats)


# Some labels and an ordering for display
varlabs <- c(y1_incomedrop="Drop in income", 
             y2_employmentdrop="Drop in employment", 
             y3_accessmarkets="Reduced access to markets",
             y4_healthaccess="Healthcare access delayed", 
             y5_missedmeals="Missed or reduced meals", 
             y6_anysupport="Received NGO or Govt support")

# get_median 
medians <- results %>% group_by(variable) %>% filter(ses == "All") %>% 
  summarize(median = median(estimate, na.rm = TRUE)) 
medians <- medians[c(5, 3, 1, 4, 6, 2),] 
medians$variable = names(varlabs)

tab2 <- results %>%
  mutate(variable = 
           recode(variable, 
      incomedrop = "y1_incomedrop", employmentdrop = "y2_employmentdrop",
      accessmarkets ="y3_accessmarkets", healthaccess="y4_healthaccess",
      missedmeals="y5_missedmeals", anysupport="y6_anysupport")) %>%
  ggplot(aes(reorder(stu2, desc(stu2)),estimate, color=ses2)) +
  geom_point(aes(),size=2, position=pd) +
  theme_bw() +
  xlab("") + 
  ylab("Share") +
  geom_hline(data = medians, aes(yintercept = median), col = "darkgrey", linetype="dashed")+ 
  geom_errorbar(aes(ymin=conf.low,ymax=conf.high),width=0.1,position=pd) + 
  coord_flip() +
  facet_wrap(~variable, labeller = labeller(variable = varlabs), nrow = 2, ncol = 3) + 
  ggtitle("Share of households experiencing:") + 
  theme(legend.title = element_blank()) + 
  guides(color = guide_legend(reverse = TRUE)) +
  scale_colour_manual(values=cbPalette)

tab2

pdf(paste0(outpath, "../results/Tab_2_Fig.pdf"), width = 10, height = 9)
tab2
dev.off()

```


# Footnotes
